metalab_data

trimmed_data <- metalab_data %>% 
  filter(!dataset %in% c(# dataset to be replaced 
                         "Infant directed speech preference", 
                         
                         # dataset to be excluded 
                         "Phonotactic learning", 
                         "Pointing and vocabulary (concurrent)", 
                         "Pointing and vocabulary (longitudinal)",
                         "Video deficit", 
                         "Word segmentation (neuro)", 
                         
                         # dataset to be modified 
                         "Statistical sound category learning", 
                         "Gaze following (live)",
                         "Gaze following (video)",
                         "Function word segmentation", 
                         "Word segmentation (behavioral)"
                         
                         )) %>% 
  mutate(ds_clean = dataset) %>% 
  bind_rows(
    ids_data %>% mutate(expt_num = as.numeric(expt_num)), #IDS replacement 

    # modifying statistical sound category learning 
    metalab_data %>% 
    filter(dataset == "Statistical sound category learning", exposure_phase == "habituation") %>% 
    mutate(ds_clean = "Statistical sound category learning (habituation)"), 
    
    #combining gaze following 
    metalab_data %>% 
      filter(dataset %in% c("Gaze following (live)","Gaze following (video)")) %>% 
      mutate(ds_clean = "Gaze following (combined)"),
    
    #combining function word segmentation 
    metalab_data %>% 
      filter(dataset %in% c("Function word segmentation", "Word segmentation (behavioral)")) %>% 
      mutate(ds_clean = "Word Segmentation (combined)")
    
  )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

basic stats

n of effect size

trimmed_data %>% 
  group_by(ds_clean) %>% 
  count() %>% 
  ggplot(aes(x = reorder(ds_clean, -n), y = n)) + 
  geom_point()+ 
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

n sample sizes

trimmed_n <- trimmed_data %>% 
  mutate(n_2 = ifelse(is.na(n_2), 0, n_2)) %>% 
  distinct(ds_clean, same_infant, n_1, n_2) %>% 
  mutate(total_infants = n_1 + n_2) %>% 
  group_by(ds_clean) %>% 
  summarise(n = sum(total_infants)) %>% 
  arrange(n) 

trimmed_n %>% 
  ggplot(aes(reorder(ds_clean,-n), n)) + 
  geom_point() + 
  theme_classic() + 
  theme() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

do we worry about the potential outliers here? ds that has small effect size n but contributes a lot of sample n?

trimmed_data %>% 
  group_by(ds_clean) %>% 
  count() %>% 
  rename(effect_size_n = n) %>% 
  left_join(trimmed_n %>% rename(sample_size = n), by = "ds_clean") %>% 
  ggplot(aes(x = effect_size_n, 
             y = sample_size)) + 
  geom_point() + 
  geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

method distribution

response mode

trimmed_data %>% 
  group_by(ds_clean, response_mode) %>% 
  count() %>% 
  ungroup() %>% 
  left_join(
    trimmed_data %>% 
  group_by(ds_clean) %>% summarise(n_row = n()), 
  by = "ds_clean"
  ) %>% 
  mutate(proportion_response_mode = n/ n_row) %>% 
  
  ggplot(aes(x = reorder(response_mode, -proportion_response_mode), y = proportion_response_mode)) + 
  geom_point() + 
  facet_wrap(~ds_clean) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        strip.text.x = element_text(size = 7)) + 
  labs(title = "Response mode")

method

trimmed_data %>% 
  group_by(ds_clean, method) %>% 
  count() %>% 
  ungroup() %>% 
  left_join(
    trimmed_data %>% 
  group_by(ds_clean) %>% summarise(n_row = n()), 
  by = "ds_clean"
  ) %>% 
  mutate(proportion_method = n/ n_row) %>% 
  
  ggplot(aes(x = reorder(method, -proportion_method), y = proportion_method)) + 
  geom_point() + 
  facet_wrap(~ds_clean) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        strip.text.x = element_text(size = 7)) + 
  labs(title = "method")

dependent_measure

trimmed_data %>% 
  group_by(ds_clean, dependent_measure) %>% 
  count() %>% 
  ungroup() %>% 
  left_join(
    trimmed_data %>% 
  group_by(ds_clean) %>% summarise(n_row = n()), 
  by = "ds_clean"
  ) %>% 
  mutate(proportion_dm = n/ n_row) %>% 
  ggplot(aes(x = dependent_measure, y = proportion_dm)) + 
  geom_point() + 
  facet_wrap(~ds_clean) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        strip.text.x = element_text(size = 7)) + 
  labs(title = "dependent_measure")

ok everything above looks very messy, let’s create some birdeye view on all methods & do some recoding

trimmed_data %>% 
  mutate(mega_method = paste(response_mode, method, dependent_measure, sep = "_")) %>% 
  distinct(ds_clean, mega_method) %>% 
  group_by(ds_clean) %>% 
  count() %>% 
  ggplot(aes(reorder(ds_clean,-n), n)) + 
  geom_point() + 
  theme_classic() + 
  theme() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
  labs(title = "unique mega_method")

trimmed_data %>% 
  mutate(mega_method = paste(response_mode, method, dependent_measure, sep = "_")) %>% 
  distinct(ds_clean, mega_method)
## # A tibble: 85 × 2
##    ds_clean                            mega_method                              
##    <chr>                               <chr>                                    
##  1 Label advantage in concept learning eye-tracking_forced-choice_looking_time  
##  2 Label advantage in concept learning behavior_forced-choice_target_selection  
##  3 Label advantage in concept learning behavior_forced-choice_exploration_time  
##  4 Vowel discrimination (native)       eye-tracking_anticipatory eye movements_…
##  5 Vowel discrimination (native)       eye-tracking_stimulus alternation_lookin…
##  6 Vowel discrimination (native)       eye-tracking_central fixation_looking_ti…
##  7 Vowel discrimination (native)       behavior_conditioned head-turn_pc_head_t…
##  8 Vowel discrimination (native)       eye-tracking_head-turn preference proced…
##  9 Vowel discrimination (native)       EEG_oddball_peak_amplitude               
## 10 Vowel discrimination (native)       behavior_high-amplitude sucking_sucking_…
## # … with 75 more rows

response mode doesn’t feel like a very meaningful categories?

trimmed_data %>% 
  distinct(ds_clean, response_mode) 
## # A tibble: 45 × 2
##    response_mode ds_clean                           
##    <chr>         <chr>                              
##  1 eye-tracking  Label advantage in concept learning
##  2 behavior      Label advantage in concept learning
##  3 eye-tracking  Vowel discrimination (native)      
##  4 behavior      Vowel discrimination (native)      
##  5 EEG           Vowel discrimination (native)      
##  6 NIRS          Vowel discrimination (native)      
##  7 behavior      Vowel discrimination (non-native)  
##  8 eye-tracking  Vowel discrimination (non-native)  
##  9 NIRS          Vowel discrimination (non-native)  
## 10 EEG           Vowel discrimination (non-native)  
## # … with 35 more rows

importing some recoding of the method (categorize based on look vs no-look)

m_recode <- read_csv(here("data/ds_clean_rework.csv"))
## Rows: 85 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): ds_clean, response_mode, method, dependent_measure, looking, note
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trimmed_data <- m_recode %>% 
  select(-note) %>% 
  left_join(trimmed_data, by = c("ds_clean", "response_mode", "method", "dependent_measure"))

actually didn’t see much “looking” for young children trend

filter(trimmed_data, 
       mean_age_months < 36) %>%
  ggplot(aes(x = mean_age_months, y = d_calc, 
             weight = 1/d_var_calc)) +
  geom_point(aes(size = 1/d_var_calc,  
             color = looking),
             alpha = .3) + 
  facet_wrap(~ ds_clean) + 
  geom_hline(yintercept = 0, lty = 2, col = "black") + 
  xlab("Mean age (months)") +
  ylab("Effect size (d)") +
  ggthemes::theme_few()

let’s see familiarization vs habituation vs test only in paradigms measuring looking

filter(trimmed_data, 
       mean_age_months < 36)  %>% 
  filter(exposure_phase %in% c("familiarization", "habituation", "test_only"), 
         looking == "yes") %>% 
  ggplot(aes(x = mean_age_months, y = d_calc, 
             weight = 1/d_var_calc)) +
  geom_point(aes(size = 1/d_var_calc,  
             color = exposure_phase),
             alpha = .3) + 
  facet_wrap( ~ ds_clean) + 
  geom_hline(yintercept = 0, lty = 2, col = "black") + 
  xlab("Mean age (months)") +
  ylab("Effect size (d)") +
  ggthemes::theme_few()

infant type distribution

it_recode <- read_csv(here("data/infant_type_rework.csv"))
## Rows: 157 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): ds_clean, native_lang, infant_type, infant_type_clinical (typical v...
## lgl (2): long_cite, infant_native_lang
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trimmed_data <- it_recode %>% 
  select(-long_cite) %>% 
  left_join(trimmed_data, by = c("ds_clean", "native_lang", "infant_type")) %>% 
  rename(infant_type_clincal =  `infant_type_clinical (typical vs nt (preterm / delayed / developmental disorder)`)

very little to be meaningful, should probably prune?

filter(trimmed_data, 
       mean_age_months < 36)  %>% 
  ggplot(aes(x = mean_age_months, y = d_calc, 
             weight = 1/d_var_calc)) +
  geom_point(aes(size = 1/d_var_calc,  
             color = infant_type_clincal),
             alpha = .3) + 
  facet_wrap( ~ ds_clean) + 
  geom_hline(yintercept = 0, lty = 2, col = "black") + 
  xlab("Mean age (months)") +
  ylab("Effect size (d)") +
  ggthemes::theme_few() 

this has a spread, may consider recode something?

filter(trimmed_data, 
       mean_age_months < 36)  %>% 
  ggplot(aes(x = mean_age_months, y = d_calc, 
             weight = 1/d_var_calc)) +
  geom_point(aes(size = 1/d_var_calc,  
             color = infant_type_language),
             alpha = .3) + 
  facet_wrap( ~ ds_clean) + 
  geom_hline(yintercept = 0, lty = 2, col = "black") + 
  xlab("Mean age (months)") +
  ylab("Effect size (d)") +
  ggthemes::theme_few() 

publicaion year?

citation density?

trimmed_data  %>% 
  ggplot(aes(x = year, y = d_calc, color = publication_type)) +
  geom_point() + 
  facet_wrap( ~ ds_clean) + 
  geom_hline(yintercept = 0, lty = 2, col = "black") + 
  xlab("publishing year") +
  ylab("Effect size (d)") +
  ggthemes::theme_few() 
## Warning: Removed 524 rows containing missing values (geom_point).

filter(trimmed_data, 
       mean_age_months < 36) %>%
  ggplot(aes(x = mean_age_months, y = d_calc, 
             weight = 1/d_var_calc)) +
  geom_point(aes(size = 1/d_var_calc),
             alpha = .3) + 
  geom_smooth(method="lm", formula = y ~ x, 
              aes(col = "Linear"), 
              se = FALSE) + 
  geom_smooth(method="lm", formula = y ~ log(x), 
              aes(col = "Log"),
              se = FALSE) + 
  geom_smooth(method="lm", formula = y ~ I(x^2),
              aes(col = "Quadratic"),
              se = FALSE) +
  facet_wrap(~ ds_clean) + 
  geom_hline(yintercept = 0, lty = 2, col = "black") + 
  xlab("Mean age (months)") +
  ylab("Effect size (d)") +
  scale_colour_solarized(name="Models", breaks = c("Linear", "Log",
                                                "Quadratic", "Linear and Log"),
                                     labels=c("Linear" = "Linear",
                                              "Log" = "Log",
                                              "Quadratic" = "Quadratic",
                                              "Linear and Log" = "Linear and Log")) +
  ggthemes::theme_few()